	
	/******************************************************************************\
	|    Title:        	 Replication code for Globalization, Government            |
	|                    Popularity, and the Great Skill Divide     			   |
	|    Date:         	 September 2023                                            |
	|	 Description:    Gallup data cleaning and merging      					   |
	\******************************************************************************/


*******************************************************************************
**#            	   I -  Globals and variable definitions 
*******************************************************************************
	
	use "$data/raw/ESS_raw.dta", clear
	
	drop if inlist(cntry, "AL", "LV", "ME", "RO", "XK")
	
	foreach additional in ESS2IT ESS4AT ESS4LT ESS5ATe1_1{
		
		append using "$data/raw/`additional'"
	
	}
	
	
	/* Occupation
	* Categorisation of occupation is based on ISCO88 for waves 1-5 and on ISCO08 for
	* waves 6-8. This information is merged into one variable based on ISCO88. */
	
	recode iscoco (0/999=1 "Armed Forces") (1000/1319=2 "Legislators, Senior Officials, Managers") ///
	(2000/2470=3 "Professionals") (3000/3480=4 "Technicians/Associate professionals") ///
	(4000/4290=5 "Clerks") (5000/5220=6 "Service workers/Sales workers") (6000/6210=7 "Skilled agricultural workers") ///
	(7000/7442=8 "Craft workers") (8000/8340=9 "Plant and machine operators") (9000/9330=10 "Elementary occupations") ///
	(66666 77777 88888 99999=.), gen (occup)

	recode isco08 (0/999=1 "Armed Forces") (1000/1439=2 "Legislators, Senior Officials, Managers") ///
	(2000/2659=3 "Professionals") (3000/3522=4 "Technicians/Associate professionals") ///
	(4000/4419=5 "Clerks") (5000/5419=6 "Service workers/Sales workers") (6000/6340=7 "Skilled agricultural workers") ///
	(7000/7549=8 "Craft workers") (8000/8350=9 "Plant and machine operators") (9000/9629=10 "Elementary occupations") ///
	(66666 77777 88888 99999=.), gen (occup_w68)

	replace occup = occup_w68 if occup==.
	lab var occup "Type of occupation, ISCO classification"

	* High-Medium-Low skill:
	gen highskilled = inlist(occup,2,3,4)
	gen midskilled = inlist(occup,5,6,7,8,9)
	gen lowskilled = occup==10
	foreach var of varlist highskilled midskilled lowskilled {
		replace `var' = . if missing(occup)
	}
	

	
	* Generate country_gallup to merge with export data:
	levelsof cntry
	gen country_gallup = ""
	gen stata_id = .

	quietly: {
	replace country_gallup = "Austria" if cntry == "AT"
	replace stata_id=8		if country_gallup=="Austria"
	
	replace country_gallup = "Belgium" if cntry == "BE"
	replace stata_id=15		if country_gallup=="Belgium"
	
	replace country_gallup = "Bulgaria" if cntry == "BG" 
	replace stata_id=25		if country_gallup=="Bulgaria"
	
	replace country_gallup = "Switzerland" if cntry == "CH"
	replace stata_id=171	if country_gallup=="Switzerland"
	
	replace country_gallup = "Cyprus" if cntry == "CY"
	replace stata_id=44		if country_gallup=="Cyprus"
	
	replace country_gallup = "Czech Republic" if cntry == "CZ" /*Czech Republic has no trade data*/
	replace stata_id=45		if country_gallup=="Czech Republic" |  country_gallup=="Czechia"
	
	replace country_gallup = "Germany" if cntry == "DE" 
	replace stata_id=64		if country_gallup=="Germany"
	
	replace country_gallup = "Denmark" if cntry == "DK" 
	replace stata_id=46		if country_gallup=="Denmark"
	
	replace country_gallup = "Estonia" if cntry == "EE" 
	replace stata_id=55		if country_gallup=="Estonia"
	
	replace country_gallup = "Spain" if cntry == "ES" 
	replace stata_id=164	if country_gallup=="Spain"
	
	replace country_gallup = "Finland" if cntry == "FI" 
	replace stata_id=59		if country_gallup=="Finland"
	
	replace country_gallup = "France" if cntry == "FR" 
	replace stata_id=60		if country_gallup=="France"
	
	replace country_gallup = "United Kingdom" if cntry == "GB" 
	replace stata_id=188	if country_gallup=="United Kingdom" 
	
	replace country_gallup = "Greece" if cntry == "GR" 
	replace stata_id=66		if country_gallup=="Greece"
	
	replace country_gallup = "Croatia" if cntry == "HR" 
	replace stata_id=42		if country_gallup=="Croatia"
	
	replace country_gallup = "Hungary" if cntry == "HU" 
	replace stata_id=75		if country_gallup=="Hungary"
	
	replace country_gallup = "Ireland" if cntry == "IE" 
	replace stata_id=81		if country_gallup=="Ireland"
	
	replace country_gallup = "Israel" if cntry == "IL" 
	replace stata_id=82		if country_gallup=="Israel"
	
	replace country_gallup = "Iceland" if cntry == "IS" 
	replace stata_id=76		if country_gallup=="Iceland"
	
	replace country_gallup = "Italy" if cntry == "IT" 
	replace stata_id=83		if country_gallup=="Italy"
	
	replace country_gallup = "Lithuania" if cntry == "LT" 
	replace stata_id=102	if country_gallup=="Lithuania"
	
	replace country_gallup = "Luxembourg" if cntry == "LU" 
	replace stata_id=103	if country_gallup=="Luxembourg"
	
	replace country_gallup = "Netherlands" if cntry == "NL" 
	replace stata_id=125	if country_gallup=="Netherlands"
	
	replace country_gallup = "Norway" if cntry == "NO" 
	replace stata_id=131	if country_gallup=="Norway"
	
	replace country_gallup = "Poland" if cntry == "PL" 
	replace stata_id=140	if country_gallup=="Poland"
	
	replace country_gallup = "Portugal" if cntry == "PT" 
	replace stata_id=141	if country_gallup=="Portugal"
	
	replace country_gallup = "Russian Federation" if cntry == "RS" 	
	replace country_gallup = "Russian Federation" if cntry == "RU" 
	replace stata_id=145	if country_gallup=="Russia" | country_gallup=="Russian Federation"
	
	replace country_gallup = "Sweden" if cntry == "SE" 
	replace stata_id=170	if country_gallup=="Sweden"
	
	replace country_gallup = "Slovenia" if cntry == "SI" 
	replace stata_id=157	if country_gallup=="Slovenia"
	
	replace country_gallup = "Slovakia" if cntry == "SK" 
	replace stata_id=155	if country_gallup=="Slovak Republic" | country_gallup=="Slovakia"
	
	replace country_gallup = "Turkey" if cntry == "TR" 
	replace stata_id=182	if country_gallup=="Turkey"
	
	replace country_gallup = "Ukraine" if cntry == "UA"
	replace stata_id=186	if country_gallup=="Ukraine"
	
	}
	tab country_gallup
	tab stata_id

	
	* Year:
	gen year = inwyr
	replace year = inwyye if inwyr==.
	replace year = . if year==9999 // 9999 is "not available"
	lab var year "Year of interview, original"
	
	* Imputed year:
	gen year2 = year
	local i=1
	foreach x in 02 04 06 08 10 12 14 16 18 {
		replace year2 = 20`x' if year2==. & essround==`i'
		di `x'
		local i=`i'+1
	}
	lab var year2 "Year of interview, imputed"

	rename year year_missing
	rename year2 year

	* Wave year:
	gen waveyear = 2000 + essround*2
	tab waveyear
	
	* Male dummy:
	recode gndr (1=1 "Male") (2=2 "Female") (9=.), gen (gender) // 9 is "no answer"
	lab var gender "Gender"
	gen male=(gender==1)
	replace male=. if gender==.
	lab var male "Male"

	* Age and age squared:
	drop age
	rename agea age // note that few people are aged above 100
	replace age=. if age==999 // 999 is "not available"
	lab var age "Age of respondent"

	gen age2 = age^2

	* Married/civil partnership & divorced/separated:
	* for wave 1 & 2
	recode marital (1=1 "Married/partnership") (2 3=2 "Separated/divorced") (4=3 "Widowed/partner died") ///
	(5=4 "Never married/partnered") (7 8 9=.), gen (mstatus) //  7 is "Refusal", 8 is "Don't know", 9 is "No answer"
	lab var mstatus "Marital status"
	
	* for wave 1 & 2, French questionaire
	replace mstatus=1 if martlfr==6 // The French questionaire in wave 1 & 2 also included a separate category "Pacte de solididarité", 
	// a legal contract. Given that partnerships and marriage is combined in mstatus, the information was added to the category "married/partnership".
	replace mstatus=1 if martlfr==1
	replace mstatus=2 if martlfr==2 | martlfr==3
	replace mstatus=3 if martlfr==4
	replace mstatus=4 if martlfr==5
	replace mstatus=. if martlfr==7 | martlfr==8
	
	* for wave 3 & 4
	replace mstatus=1 if maritala==1 | maritala==2
	replace mstatus=2 if maritala==3 | maritala==4 | maritala==5 | maritala==7
	replace mstatus=3 if maritala==6 | maritala==8
	replace mstatus=4 if maritala==9
	
	* for wave 5, 6, 7 & 8
	replace mstatus=1 if maritalb==1 | maritalb==2
	replace mstatus=2 if maritalb==3 | maritalb==4
	replace mstatus=3 if maritalb==5
	replace mstatus=4 if maritalb==6
	
	gen partnered = (mstatus==1)
	replace partnered = . if missing(mstatus)
	gen divorced_separated = (mstatus==2)
	replace divorced_separated = . if missing(mstatus)
	
	* Presence of children in the household:
	*gen children = (chldhm==1)
	*replace children = . if 
	
	* Urban:
	recode domicil (1 2=1) (3 4 5=0) (7 8 9=.), gen (urban) // 7 is "refused", 8 is "don't know", 9 is "no answer"
	lab var urban "Urban"

	* Education variable:
	recode edulvla (1=1 "Less than secondary") (2 3 4=2 "Secondary/post-secondary non-tertiary") ///
	(5=3 "Tertiary") (0 55=4 "Other") (77 88 99=.), gen (edu) // 77 is "Refusal", 8 is "Don't know", 9 is "No answer"
	lab var edu "Eduation level (4 categories)"

	** use edulvlb for wave 8
	replace edu=1 if edulvlb < 200 						& (essround==8|essround==9)
	replace edu=2 if edulvlb > 300 & edulvlb < 500		& (essround==8|essround==9)
	replace edu=3 if edulvlb > 500 & edulvlb < 900		& (essround==8|essround==9)
	replace edu=4 if edulvlb == 5555					& (essround==8|essround==9)

	gen tertiary = (edu==3)
	replace tertiary = . if missing(edu) | edu== 4
	gen lessthantertiary = (edu<3)
	replace lessthantertiary = . if missing(edu) | edu== 4	
	
	
	* Income:
	recode hinctnt (1=1 "J") (2=2 "R") (3=3 "C") (4=4 "M") (5=5 "F") (6=6 "S") ///
	(7=7 "K") (8=8 "P") (9=9 "D") (10=10 "H") (11=11 "U") (12=12 "N") (77 88 99=.), gen (originc_w13)

	* Generate cutoffs of income groups in Euros
	* lower cutoff
	gen hinctnt_lower = .
	local i = 1
	foreach num of num 0 1800 3600 6000 12000 18000 24000 30000 36000 60000 90000 120000 {
		recode hinctnt_lower (.=`num') if(originc_w13==`i' & essround<=3)
		local i=`i'+1
	}
	label var hinctnt_lower "HH income lower Euro cut-offs of categories"

	* higher cutoff
	gen hinctnt_higher = .
	local i = 1
	foreach num of num 1799 3599 5999 11999 17999 23999 29999 35999 59999 89999 111999 150000 {
		recode hinctnt_higher (.=`num') if(originc_w13==`i' & essround<=3)
		local i=`i'+1
	}
	label var hinctnt_higher "HH income higher Euro cut-offs of categories"

	* Imputation of the "Income in Euros"- variable using uniform random values between
	* the upper and lower cut-offs

	gen income_random = hinctnt_lower + (hinctnt_higher - hinctnt_lower)*runiform()
	label var income_random "HH income in Euro: random values between cut-offs"

	* Variables containing the upper decile cut-offs
	* by country. Each var = one decile.

	sort cntry essround
	foreach num of num 10(10)90{
		by cntry essround:  /// Ensures that the income distribution is by country/year.
				egen income_q`num' = pctile(income_random) ///
				if essround<=3 , p(`num')
	}

	* Assign individuals to deciles

	gen income_q =.
	recode income_q (.=1) if(income_random<income_q10)
	recode income_q (.=2) if(income_random>=income_q10 & income_random<income_q20)
	recode income_q (.=3) if(income_random>=income_q20 & income_random<income_q30)
	recode income_q (.=4) if(income_random>=income_q30 & income_random<income_q40)
	recode income_q (.=5) if(income_random>=income_q40 & income_random<income_q50)
	recode income_q (.=6) if(income_random>=income_q50 & income_random<income_q60)
	recode income_q (.=7) if(income_random>=income_q60 & income_random<income_q70)
	recode income_q (.=8) if(income_random>=income_q70 & income_random<income_q80)
	recode income_q (.=9) if(income_random>=income_q80 & income_random<income_q90)
	recode income_q (.=10) if(income_random>=income_q90 & income_random!=.)

	label var income_q "HH income deciles - imputed"

	* Add information for waves 4-8

	recode hinctnta (1=1 "J") (2=2 "R") (3=3 "C") (4=4 "M") (5=5 "F") (6=6 "S") ///
	(7=7 "K") (8=8 "P") (9=9 "D") (10=10 "H") (77 88 99=.), gen (originc_w48)

	foreach num of num 1/10 {	
		replace income_q=`num' if originc_w48==`num'
	}
	tab income_q

		* Employment status:
	recode mnactic (1=1 "Paid work") (2=2 "Education") (3=3 "Unemployed, looking for job") ///
	(4=4 "Unemployed, not looking for job") (5=5 "Permanently sick/disabled") (6=6 "Retired") ///
	(7=7 "Community/military service") (8=8 "Housework/looking after children") ///
	(9=9 "Other") (66 77 88 99=.), gen (lfstat)
	lab var lfstat "Labour force status in last 7 days, ESS categories"
	tab lfstat
	
	
	tab year
	
	* Merge with newly created IVs and trade data:
	merge m:1 stata_id year using "$data/2002_2018_Exports_withIV.dta"
	drop if _merge == 2
	drop if _merge == 1
	drop _merge
	tab year


	merge m:1 stata_id year using "$data/2002_2018_Imports_withIV.dta"
	keep if _merge == 3
	drop _merge
	
	
	save "$data/ESS_cleaned.dta", replace